keep = ls()

##############
#Prepare Data#
##############

#Load township data
ia_twp_table = fread('./data/suffrage_referenda/cleaned/ia_twp_analysis_cleaned.csv')
wi_twp_table = fread('./data/suffrage_referenda/cleaned/wi_twp_analysis_cleaned.csv')


#Make variables: Iowa
#####################

#Enlistment rate
ia_twp_table[, vet_pct := vets_alt / elig_1868]
ia_twp_table[, vet_pct_s := vets_alt_s / elig_1868]

#Suffrage vote-share
ia_twp_table[, suff_57_yes := suff_yes_57 / (elig_1868)]
ia_twp_table[, suff_65_yes := suff_yes_68 / elig_1868]
ia_twp_table[, suff_57_no := suff_no_57 / (elig_1868)]
ia_twp_table[, suff_65_no := suff_no_68 / elig_1868]
ia_twp_table[, diff_suff := suff_65_yes - suff_57_yes]

#Eligible voter
ia_twp_table[, elig_pct := elig_1857 / elig_1868]
ia_twp_table[, change := elig_1868_70 / elig_1868]

#Estimation sample
ia_twp_table[, sample_1 := vet_pct < 0.6 & suff_57_yes < 1 & suff_65_yes < 1 ]
ia_twp_table[, sample_2 := vet_pct < 0.6 & suff_57_yes < 1 & suff_65_yes < 1 & change %between% c(0.5,1.5)]

#Make variables: Wisconsin
##########################

#Enlistment rate
wi_twp_table[, vet_pct := vets_alt / elig_1865]
wi_twp_table[, vet_pct_s := vets_alt_s / elig_1865]
wi_twp_table[, vet_pct_h := vets_alt_h / elig_1865 ]

#Suffrage Vote
wi_twp_table[, suff_57_yes := suff_yes_1857 / (elig_1865)]
wi_twp_table[, suff_65_yes := suff_yes_1865 / elig_1865]
wi_twp_table[, suff_57_no := suff_no_1857 / (elig_1865)]
wi_twp_table[, suff_65_no := suff_no_1865 / elig_1865]
wi_twp_table[, diff_suff := suff_65_yes - suff_57_yes]

#Gubernatorial Vote
wi_twp_table[, gov_57_d := gov_d_1857 / elig_1865]
wi_twp_table[, gov_65_d := gov_d_1865 / elig_1865]
wi_twp_table[, gov_57_r := gov_r_1857 / elig_1865]
wi_twp_table[, gov_65_r := gov_r_1865 / elig_1865]
wi_twp_table[, gov_57_r_diff := gov_57_r - suff_57_yes]

#Eligible Voters
wi_twp_table[, elig_pct := elig_1857 / elig_1865]
wi_twp_table[, change := elig_1865_70 / elig_1865]

#Turnout
wi_twp_table[, turnout_65 := (suff_yes_1865 + suff_no_1865)/elig_1865]
wi_twp_table[, turnout_57 := (suff_yes_1857 + suff_no_1857)/elig_1865]
wi_twp_table[, gov_to_1865 := gov_r_1865 + gov_d_1865]
wi_twp_table[, suff_to_1865 := suff_yes_1865 + suff_no_1865]
wi_twp_table[, to_1865 := .SD %>% as.matrix %>% rowMaxs(), .SDcols = c('gov_to_1865', "suff_to_1865")]
wi_twp_table[, gov_to_1857 := gov_r_1857 + gov_d_1857]
wi_twp_table[, suff_to_1857 := suff_yes_1857 + suff_no_1857]
wi_twp_table[, to_1857 := .SD %>% as.matrix %>% rowMaxs(), .SDcols = c('gov_to_1857', "suff_to_1857")]

#Estimation Sample:
wi_twp_table[, use_unit := suff_yes_1865 >=0 & 
               suff_no_1865 >= 0 & 
               gov_r_1865 >= 0 & 
               gov_d_1865 >= 0 &
               (suff_yes_1865 + suff_no_1865 + gov_r_1865 + gov_d_1865) > 0]
wi_twp_table[, sample_1 := use_unit & vet_pct < 1 & suff_57_yes < 1 & suff_65_yes < 1 ]
wi_twp_table[, sample_2 := use_unit & vet_pct < 1 & suff_57_yes < 1 & suff_65_yes < 1 & change %between% c(0.5,1.5)]

################################################
#Ecological Inference: Veterans for Suffrage#
################################################

wi_twp_ei = wi_twp_table[(sample_1) &
                           suff_yes_1865 >=0 & 
                           suff_no_1865 >= 0 &
                           (suff_yes_1865 + suff_no_1865) > 0 &
                           suff_yes_1857 >= 0 &
                           suff_no_1857 >= 0 &
                           (suff_yes_1857 + suff_no_1857) > 0, list(
                             x0 = vets_alt / elig_1865,
                             x1 = vets_alt_h / (elig_1865 - (vets_alt - vets_alt_h)),
                             t0 = suff_yes_1857 / elig_1865,
                             t1 = suff_yes_1865 / (elig_1865 - (vets_alt - vets_alt_h)),
                             n0 = elig_1865,
                             n1 = elig_1865 - (vets_alt - vets_alt_h),
                             c = County_Cluster,
                             wi_unit_id,
                             z0 = gov_r_1857 / elig_1865,
                             z1 = gov_r_1865 / (elig_1865 - (vets_alt - vets_alt_h)),
                             no0 = suff_no_1857 / elig_1865,
                             no1 =  suff_no_1865 / (elig_1865 - (vets_alt - vets_alt_h)),
                             to0 = to_1857 / elig_1865,
                             to1 = to_1865 / elig_1865
                           )
                         ]
#Enlistment
x0 = wi_twp_ei[,x0] 
x1 = wi_twp_ei[,x1] 
#Suffrage vote
t0 = wi_twp_ei[,t0]
t1 = wi_twp_ei[,t1]
#Eligible voters
n0 = wi_twp_ei[,n0]
n1 = wi_twp_ei[,n1]
#County
c = wi_twp_ei[,c]
#Republican voteshare
z0 = wi_twp_ei[,z0]
z1 = wi_twp_ei[,z1]
#No on suffrage
no0 = wi_twp_ei[,no0]
no1 = wi_twp_ei[,no1]
#Turnout
to0 = wi_twp_ei[,to0]
to1 = wi_twp_ei[,to1]

#Drop outliers
idx = ((t1-t0) %between% c(-0.2, 0.4)) & x0 < 0.7



####################################
#Ecological Bounds on Wisconsin ATT#
####################################

Bv_0 = generateBounds(x0[idx], t0[idx], n0[idx])
Bn_0 = generateBounds(1-x0[idx], t0[idx], n0[idx])
Bv_1 = generateBounds(x1[idx], t1[idx], n1[idx])
Bn_1 = generateBounds(1-x1[idx], t1[idx], n1[idx])

d1u = Bv_1$hbdu0 - Bv_0$hbdl0
d1l = Bv_1$hbdl0 - Bv_0$hbdu0

d2u = Bn_1$hbdu0 - Bn_0$hbdl0
d2l = Bn_1$hbdl0 - Bn_0$hbdu0

#upper
d1u - d2l
#lower
d1l - d2u
#estimate
lm(t1[idx] - t0[idx] ~ x0[idx], weights = n0[idx]) %>% coef %>% .[2]

###########
#Figure C1#
###########

#Get contextual effect bounds
wi_bounds = ei_bounds(x0[idx], t0[idx], t1[idx], n0[idx])

p = ggarrange(wi_bounds$v_plot + ggtitle("Wisconsin: Enlistees"),
              wi_bounds$nv_plot + ggtitle("Wisconsin: Civilians"), 
              labels = c("A", "B"),
              ncol = 2, nrow = 1, common.legend = T, legend = 'right') 
ggsave("./output/appendix/Figure_C1.pdf", p,  width = 8.5, height = 6)

###########
#Figure C2#
###########

#Low 1857 suffrage vote
t0_50 = t0[idx] %>% median
bottom_50 = t0[idx] < t0_50
i = t0[idx][bottom_50] %>% sort %>% unique

low_s_bounds = data.table(threshold = i, l = as.numeric(NA), 
                          u = as.numeric(NA), 
                          n = as.numeric(NA), 
                          n_t = as.numeric(NA), flag = NA,
                          att = as.numeric(NA))

#Loop over 1857 suffrage vote
for (t in i) {
  use = (t0 <= t) & idx
  Bv_0 = generateBounds(x0[use], t0[use], n0[use])
  Bn_0 = generateBounds(1- x0[use], t0[use], n0[use])
  Bv_1 = generateBounds(x1[use], t1[use], n1[use])
  Bn_1 = generateBounds(1- x1[use], t1[use], n1[use])
  k = 2
  d1u = evaluateBounds(Bv_1)$CI_x_upper[k] - evaluateBounds(Bv_0)$CI_x_lower[k]
  d1l = evaluateBounds(Bv_1)$CI_x_lower[k] - evaluateBounds(Bv_0)$CI_x_upper[k]
  d2u = evaluateBounds(Bn_1)$CI_x_upper[k] - evaluateBounds(Bn_0)$CI_x_lower[k]
  d2l = evaluateBounds(Bn_1)$CI_x_lower[k] - evaluateBounds(Bn_0)$CI_x_upper[k]
  
  att_hat = (lm(t1[use] ~ x1[use], weights = n1[use]) %>% coef %>% .[2]) -
    (lm(t0[use] ~ x0[use], weights = n0[use]) %>% coef %>% .[2])
  
  low_s_bounds[threshold == t, c('l', 'u', 'n', 'n_t', 'att') := list(d1l - d2u, d1u - d2l, sum(n0[use]), sum(use), att_hat)]
}

low_s_bounds[, bounds_type := "Includes Zero"]
low_s_bounds[l > 0, bounds_type := "Above Zero"]
low_s_bounds[u < 0, bounds_type := "Below Zero"]

p = ggplot(low_s_bounds, aes(x = threshold, y = att, ymin = l, ymax = u, colour = bounds_type)) +
  geom_linerange() +
  geom_point(colour = "black") + 
  theme_bw() +
  theme(legend.position = 'bottom') +
  xlab("Maximum Township 1857 Suffrage Vote") +
  ylab("ATT (Bounds)") + 
  geom_hline(yintercept = 0) +
  scale_colour_manual(values = c("green", "gray"))

ggsave("./output/appendix/Figure_C2.pdf", p, width = 8.5, height = 6)

###########
#Figure C3#
###########

#Low Republican/Suffrage townships
i = z0[idx] %>% sort %>% unique
i = i[i <= 0.05]

low_r_bounds = data.table(threshold = i, l = as.numeric(NA), 
                          u = as.numeric(NA), 
                          n = as.numeric(NA), 
                          n_t = as.numeric(NA), 
                          att = as.numeric(NA))

#Loop over thresholds
for (t in i) {
  use = (z0 <= t) & (t0 <= z0) & idx
  m = lm(t1[use] - t0[use] ~ x1[use] + z0[use] , weights = n0[use] ) %>% coeftest(., vcov. = vcovHC(.))
  m_alt = lm(t1[use] - t0[use] ~ x1[use] + z0[use] , weights = n0[use] ) %>% coeftest(., vcov. = vcovHC(., 'const'))
  
  att_hat = m[2,1]
  se = max(c(m[2,2], m_alt[2,2]))
  ci_u = att_hat + se*2
  ci_l = att_hat - se*2
  low_r_bounds[threshold == t, c('l', 'u', 'n', 'n_t', 'att') := list(ci_l, ci_u, sum(n0[use]), sum(use), att_hat)]
}


p = ggplot(low_r_bounds, aes(x = threshold, y = att, ymin = l, ymax = u)) +
  geom_linerange() +
  geom_point(colour = "black") + 
  theme_bw() +
  theme(legend.position = 'bottom') +
  xlab("Maximum Township 1857 Rep. and Suffrage Vote") +
  ylab("ATT") + 
  geom_hline(yintercept = 0) +
  scale_colour_manual(values = c("gray"))

ggsave("./output/appendix/Figure_C3.pdf", p, width = 8.5, height = 6)

###############
#Bounds on ATT#
###############

#Define function to get bounds
myBounds = function(x,y,n,k = 2) {
  #Relabel
  b_x = x
  w_x = 1 - x
  
  #Get mean t, x
  y_bar = weighted.mean(y, n)
  b_x_bar = weighted.mean(b_x, n)
  w_x_bar = weighted.mean(w_x, n)
  
  #Apply Jiang et al bounds to b, w
  Bb_0 = generateBounds(b_x, y, n, printSummary = F)
  Bw_0 = generateBounds(w_x, y, n, printSummary = F)
  
  #get bounds from Bb and Bw model
  b_u_1 = evaluateBounds(Bb_0)$CI_x_upper[k]
  b_l_1 = evaluateBounds(Bb_0)$CI_x_lower[k]
  w_u_1 = evaluateBounds(Bw_0)$CI_x_upper[k]
  w_l_1 = evaluateBounds(Bw_0)$CI_x_lower[k]
  
  #Calculate implied bounds
  w_l_2 = (y_bar - b_x_bar*b_u_1)/w_x_bar
  w_u_2 = (y_bar - b_x_bar*b_l_1)/w_x_bar
  b_l_2 = (y_bar - w_x_bar*w_u_1)/b_x_bar
  b_u_2 = (y_bar - w_x_bar*w_l_1)/b_x_bar
  
  #return
  out = list(
    b_l = max(b_l_1, b_l_2),
    b_u = min(b_u_1, b_u_2),
    w_l = max(w_l_1, w_l_2),
    w_u = min(w_u_1, w_u_2),
    y_bar = y_bar,
    b_x_bar = b_x_bar,
    w_x_bar = w_x_bar
  )
  return(out)
}

#Define sample (suffrage vote < 4 ppt in 1857)
t = 0.0411
use = (t0 < t) & idx

#0.25 SE on bounds
k = 2

#Define possible "trenders" toward suffrage
#republicans not voting for suffrage
z0_nro = ifelse(z0-t0 < 0 , 0, z0-t0)
#democrats not vorting NO on suffrage
d0 = ifelse((to0 - z0) < 0 , 0, (to0 - z0))
d0_yro = ifelse((d0 - no0) < 0, 0, d0 - no0)


#Bounds on ATT
##############

#Get time 0 bounds on veterans support for suffrage
b_t0 = myBounds(x0[use], t0[use], n0[use], k = k)
#Get time 1 bounds on veterans support for suffrage
b_t1 = myBounds(x1[use], t1[use], n1[use], k = k)

#Taking lower bounds on veterans
y11_n_min_l = pmax(b_t1$w_u, b_t0$b_u)
y11_n_max_l = b_t1$b_l
#Taking upper bounds on veterans
y11_n_min_u = pmax(b_t1$w_l, b_t0$b_l)
y11_n_max_u = b_t1$b_u

#ATT bounds taking lower veteran bound
b_t1$b_l - y11_n_min_l
b_t1$b_l - y11_n_max_l

#ATT bounds taking upper veteran bound
b_t1$b_u - y11_n_min_u
b_t1$b_u - y11_n_max_u


#Plausibility of bounds: 
#size of "trending toward suffrage" population

#Assuming all "effects" of enlistment due to selection:
#"trenders" make up this fraction of population:
fraction_suffrage_predisposed = (b_t1$y_bar - b_t0$y_bar)
fraction_suffrage_predisposed

#Compare this against the fraction pro-suffrage in 1857:
b_t0$y_bar

#Compare this against the "trend" toward suffrage state-wide:
weighted.mean(t1 - t0, n0)

#Compare this against the population 
#most likely to "Trend" toward pro-suffrage
#Republicans who did not vote yes in 1857
#Democrats who did not vote no in 1857

b_z0 = myBounds(x0[use], z0_nro[use], n0[use], k = k)
b_d0 = myBounds(x0[use], d0_yro[use], n0[use], k = k)
fraction_partisan_disposed = b_z0$y_bar + b_d0$y_bar
fraction_partisan_disposed

#What fraction of these "likely" trenders
#actually voted for suffrage in 1865?
#Upper bound is:
b_trend = myBounds((z0_nro+ d0_yro)[use], t1[use], n0[use], k = k)
b_trend$b_u
  
#What fraction of enlistees were "likely" trenders
#Upper bound is:
b_trend = myBounds(x0[use], (z0_nro+ d0_yro)[use], n0[use], k = k)
b_trend$b_u

#Plausibility of bounds: 
#magnitude of selection bias
 
#probability of those "trending" pro suffrage
#selecting into service
#pr(enlist|trending) = p(trending | enlist)*p(enlist)/p(trending)

# at upper EI bound on veterans
(b_t1$b_u-b_t0$b_l)*b_t1$b_x_bar/(b_t1$y_bar - b_t0$y_bar)
# at lower EI bound on veterans
(b_t1$b_l-b_t0$b_u)*b_t0$b_x_bar/(b_t1$y_bar - b_t0$y_bar)

#probability of those "not trending" pro suffrage
#selecting into service
#pr(enlist|!trending) = p(trending | enlist)*p(enlist)/p(trending)

# at upper EI bound on veterans
(1 - (b_t1$b_u))*b_t0$b_x_bar / (1 - (b_t1$y_bar))
# at lower EI bound on veterans
(1 - (b_t1$b_l))*b_t0$b_x_bar / (1 - (b_t1$y_bar))

#Probability of democrat/republican voters in 1857 enlisting
d_t0 = myBounds(d0[idx], x0[idx], n0[idx])
z_t0 = myBounds(z0[idx], x0[idx], n0[idx])

#Maximum selection bias into enlistment of Republicans vs Democrats
z_t0$b_u/d_t0$b_l

#Minimum selection bias into enlistment of Republicans vs Democrats
z_t0$b_l/d_t0$b_u

##############################################################
#Cleanup
rm(list = setdiff(ls(), c(keep, 'keep')))
gc()
